library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(lubridate)
#install.packages("fredr")
library(fredr)
#install.packages("DescTools")
library(DescTools)
# install.packages("pracma")
library(pracma)
## 
## Attaching package: 'pracma'
## 
## The following objects are masked from 'package:DescTools':
## 
##     Mode, Rank
## 
## The following object is masked from 'package:purrr':
## 
##     cross
# install.packages("seastests")
library(seastests)
# install.packages(c("seasonal", "x13binary"))
library(seasonal)
## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:seastests':
## 
##     qs
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## 
## Attaching package: 'forecast'
## 
## The following object is masked from 'package:DescTools':
## 
##     BoxCox
library(aTSA)
## 
## Attaching package: 'aTSA'
## 
## The following object is masked from 'package:forecast':
## 
##     forecast
## 
## The following objects are masked from 'package:tseries':
## 
##     adf.test, kpss.test, pp.test
## 
## The following object is masked from 'package:graphics':
## 
##     identify
library(tsibble)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## 
## Attaching package: 'tsibble'
## 
## The following object is masked from 'package:lubridate':
## 
##     interval
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(fable)
## Loading required package: fabletools
## 
## Attaching package: 'fabletools'
## 
## The following objects are masked from 'package:aTSA':
## 
##     estimate, forecast
## 
## The following objects are masked from 'package:DescTools':
## 
##     MAE, MAPE, MSE, RMSE
# install.packages("uroot")
library(urca)
library(feasts)

Task 1

Notes

Q1: 01-01, Q2: 04-01, Q3: 07-01, Q4: 10-01

api_key <- '3949fd9fcfaa43711339ce7b8a243180'

fredr_set_key(api_key)

# Federal Funds Effective Rate (FFER)
ffer_full <- fredr(
  series_id = "FEDFUNDS",
  frequency = "q",              # quarterly
  aggregation_method = "avg",
  observation_start = as.Date("1980-01-01")
)

head(ffer_full)
# Personal Consumption Expenditures Index (PCEPI) - Inflation measure
pcepi_full <- fredr(
  series_id = "PCEPI",
  frequency = "q",
  aggregation_method = "avg",
  observation_start = as.Date("1980-01-01")
)

head(pcepi_full)
# Real GDP
gdp_full <- fredr(
  series_id = "GDPC1",
  frequency = "q",
  aggregation_method = "avg",
  observation_start = as.Date("1980-01-01")
)

head(gdp_full)
# Potential Real GDP
pot_gdp_full <- fredr(
  series_id = "GDPPOT",
  frequency = "q",
  aggregation_method = "avg",
  observation_start = as.Date("1980-01-01")
)

head(pot_gdp_full)
# Add pct_chg_pce col to PCE series
t_val <- pcepi_full$value
t_minus_one_val <- lag(pcepi_full$value, 1)
pcepi_full$pct_chg_pce <- (t_val - t_minus_one_val) / t_minus_one_val

gdp_merged <- inner_join(gdp_full, pot_gdp_full, by='date')

# Add Output Gap col to GDP series
gdp_merged$output_gap <- (gdp_merged$value.x - gdp_merged$value.y) * 100 / gdp_merged$value.y

gdp_merged$series_id <- 'OUTPUTGAP'
gdp_merged <- gdp_merged %>% rename(realtime_start = realtime_start.x, realtime_end = realtime_end.x, value=output_gap)

output_gap <- gdp_merged %>% select(date, series_id, value, realtime_start, realtime_end)
ffer_full %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() + 
  labs(title = "Federal Funds Effective Rate (FFER)",
       x = "date",
       y = 'FFER')

ffer_full %>%
  summarize(
    mean = mean(value, na.rm = TRUE),
    var_value  = var(value, na.rm = TRUE),
    sd_value   = sd(value, na.rm = TRUE),
    n_obs      = n()
  )

One may note a few key historical events that appear in the graph:

<Comments>

pcepi_full %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() + 
  labs(title = "Personal Consumption Expenditures Index (PCEPI)",
       x = "date",
       y = 'PCEPI')
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

pcepi_full %>%
  summarize(
    mean = mean(value, na.rm = TRUE),
    var_value  = var(value, na.rm = TRUE),
    sd_value   = sd(value, na.rm = TRUE),
    n_obs      = n()
  )
pcepi_full %>% 
  ggplot(aes(x = date, y = pct_chg_pce)) + 
  geom_line() + 
  labs(title = "Pct Change PCE",
       x = "date",
       y = 'Pct Change PCE')
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

PCEPI (Inflation) shows events that are related to events that spurred the FFER changes we see in the above graph.

<Comments>

# Do regular GDP, POT GDP, and output gap

gdp_full %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() + 
  labs(title = "Real GDP ",
       x = "date",
       y = 'GDP')

gdp_full %>%
  summarize(
    mean = mean(value, na.rm = TRUE),
    var_value  = var(value, na.rm = TRUE),
    sd_value   = sd(value, na.rm = TRUE),
    n_obs      = n()
  )
pot_gdp_full %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() + 
  labs(title = "Potential Real GDP ",
       x = "date",
       y = 'GDPPOT')

pot_gdp_full %>%
  summarize(
    mean = mean(value, na.rm = TRUE),
    var_value  = var(value, na.rm = TRUE),
    sd_value   = sd(value, na.rm = TRUE),
    n_obs      = n()
  )
output_gap %>% 
  ggplot(aes(x = date, y = value)) + 
  geom_line() + 
  labs(title = "Output gap",
       x = "date",
       y = 'Output gap')

GDP appears to steadily increase from 1980 to 2020, with a notable dip in 2020 due the COVID pandemic. The output gap timeseries is affected by similar events as FFER and PCEPI:

Task 2

2.1: Frequency Alignment (Resampling)

Based on the format of the API call to FRED, data is received at a quarterly cadence, with an average over all months in the quarter calculated and returned in the dataframe. No resampling was done.

ensure_same_dates <- ffer_full %>% 
  full_join(gdp_full, by='date') %>%
  full_join(pcepi_full, by='date') %>% 
  full_join(pot_gdp_full, by='date') %>%
  full_join(output_gap, by='date')

# Filter all to 2025-04-01 after visual examination of the data
ffer_full <- ffer_full %>% filter(date <= as.Date('2025-04-01'))
pcepi_full <- pcepi_full %>% filter(date <= as.Date('2025-04-01'))
pot_gdp_full <- pot_gdp_full %>% filter(date <= as.Date('2025-04-01'))
output_gap <- output_gap %>% filter(date <= as.Date('2025-04-01'))

2.2: Outlier Detection & Treatment

Method: Hampel filter

To detect outliers, we’ve chosen to use the Hampel filter. We prefer this method because based on a visual examination of FFER, PECPI, and GDP data, scores appear to be mostly following either an upward trend or a fluctuating downward trend, with few outliers. The outliers that do appear (i.e. in Real GDP around 2020) are within the data (as opposed to on the upper and lower ends of the data). Thus, this would not be caught by winsorization. The rolling window calculations from a Hampel filter (relying on deviations from a median) would catch these outliers and smooth out the data. Z-scores, as well, is best suited to normally distributed data, which we don’t have.

Outlier Detection

We utilize a Hampel filter for the FFER, PECPI, GDP, GDPPOT, and output gap timeseries. A window size of 5 appeared to smooth out outliers best upon a visual examination of the corrected timeseries data. While, we ran a Hampel filters to detect potential outliers, we retained the original FFER series for estimation because large rate changes reflect actual policy, not data errors.

ffer_full$hampel_val <- hampel(ffer_full$value, k = 5)[[1]]

ffer_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = hampel_val, color = "Hampel")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "Federal Funds Effective Rate (FFER)",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Hampel" = "blue", "Original" = "red"),
                     name = "Legend")

pcepi_full$hampel_val <- hampel(pcepi_full$value, k = 5, )[[1]]

pcepi_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = hampel_val, color = "Hampel")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "PCEPI",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Hampel" = "blue", "Original" = "red"),
                     name = "Legend")

gdp_full$hampel_val <- hampel(gdp_full$value, k = 5, )[[1]]

gdp_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = hampel_val, color = "Hampel")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "GDP",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Hampel" = "blue", "Original" = "red"),
                     name = "Legend")

pot_gdp_full$hampel_val <- hampel(pot_gdp_full$value, k = 5, )[[1]]

pot_gdp_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = hampel_val, color = "Hampel")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "GDPPOT",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Hampel" = "blue", "Original" = "red"),
                     name = "Legend")

output_gap$value <- (gdp_full$hampel_val - pot_gdp_full$hampel_val) * 100 / pot_gdp_full$hampel_val

2.3: Seasonal Adjustment

Detection

ffer_full <- ffer_full %>% mutate(q = quarter(date))


ffer_full$q_factor <- factor(ffer_full$q, levels = c("1", "2", "3", "4"))

ggplot(ffer_full, aes(x = q_factor, y = value)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Distribution of Values by Quarter",
       x = "Quarter",
       y = "Value") +
  theme_minimal()

ffer_full %>% filter(q == 1) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "FFER Q1", x = "Date", y = "Value")

ffer_full %>% filter(q == 2) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "FFER Q2", x = "Date", y = "Value")

ffer_full %>% filter(q == 3) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "FFER Q3", x = "Date", y = "Value")

ffer_full %>% filter(q == 4) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "FFER Q4", x = "Date", y = "Value")

start_year <- 1980
start_quarter <- 1

ffer_ts <- ts(ffer_full$hampel_val, start = c(start_year, start_quarter), frequency=4)

seastests::qs(ffer_ts)
## Test used:  QS 
##  
## Test statistic:  0 
## P-value:  1

According to our QS test, no evidence for seasonality in the FFER data exists

pcepi_full <- pcepi_full %>% mutate(q = quarter(date))

pcepi_full$q_factor <- factor(pcepi_full$q, levels = c("1", "2", "3", "4"))

ggplot(pcepi_full, aes(x = q_factor, y = value)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Distribution of Values by Quarter",
       x = "Quarter",
       y = "Value") +
  theme_minimal()

pcepi_full %>% filter(q == 1) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "PCEPI Q1", x = "Date", y = "Value")

pcepi_full %>% filter(q == 2) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "PCEPI Q2", x = "Date", y = "Value")

pcepi_full %>% filter(q == 3) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "PCEPI Q3", x = "Date", y = "Value")

pcepi_full %>% filter(q == 4) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "PCEPI Q4", x = "Date", y = "Value")

start_year <- 1980
start_quarter <- 1

pcepi_ts <- ts(pcepi_full$hampel_val, start = c(start_year, start_quarter), frequency=4)

seastests::qs(pcepi_ts)
## Test used:  QS 
##  
## Test statistic:  16.15 
## P-value:  0.0003110518

According to our QS test, evidence for seasonality in the PCEPI data exists.

gdp_full <- gdp_full %>% mutate(q = quarter(date))

gdp_full$q_factor <- factor(gdp_full$q, levels = c("1", "2", "3", "4"))

ggplot(gdp_full, aes(x = q_factor, y = value)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Distribution of Values by Quarter",
       x = "Quarter",
       y = "Value") +
  theme_minimal()

gdp_full %>% filter(q == 1) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDP Q1", x = "Date", y = "Value")

gdp_full %>% filter(q == 2) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDP Q2", x = "Date", y = "Value")

gdp_full %>% filter(q == 3) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDP Q3", x = "Date", y = "Value")

gdp_full %>% filter(q == 4) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDP Q4", x = "Date", y = "Value")

start_year <- 1980
start_quarter <- 1

gdp_full_ts <- ts(gdp_full$hampel_val, start = c(start_year, start_quarter), frequency=4)

seastests::qs(gdp_full_ts)
## Test used:  QS 
##  
## Test statistic:  0 
## P-value:  1

According to our QS test, no evidence for seasonality in the GDP data exists

pot_gdp_full <- pot_gdp_full %>% mutate(q = quarter(date))

pot_gdp_full$q_factor <- factor(pot_gdp_full$q, levels = c("1", "2", "3", "4"))

ggplot(pot_gdp_full, aes(x = q_factor, y = value)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Distribution of Values by Quarter",
       x = "Quarter",
       y = "Value") +
  theme_minimal()

pot_gdp_full %>% filter(q == 1) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDPPOT Q1", x = "Date", y = "Value")

pot_gdp_full %>% filter(q == 2) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDPPOT Q2", x = "Date", y = "Value")

pot_gdp_full %>% filter(q == 3) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDPPOT Q3", x = "Date", y = "Value")

pot_gdp_full %>% filter(q == 4) %>% ggplot(aes(x=date, y=hampel_val)) + geom_line() + labs(title = "GDPPOT Q4", x = "Date", y = "Value")

start_year <- 1980
start_quarter <- 1

pot_gdp_full_ts <- ts(pot_gdp_full$hampel_val, start = c(start_year, start_quarter), frequency=4)

seastests::qs(pot_gdp_full_ts)
## Test used:  QS 
##  
## Test statistic:  204.06 
## P-value:  0

According to our QS test, evidence for seasonality in the GDPPOT data exists

Seasonal Adjustment via X-13ARIMA-SEATS

This method is used because it allows for timeseries that can be decomposed additively or multiplicatively, allowing for maximum flexibility. The trend, seasonal component, and irregular component are all estimated and the algorithm uses centered moving averages to remove the seasonal component.

Source: https://en.wikipedia.org/wiki/X-13ARIMA-SEATS

pcepi_fit <- seas(pcepi_ts)

pcepi_full$s_adj_val <- final(pcepi_fit)

pot_gdp_fit <- seas(pot_gdp_full_ts)
## Model used in SEATS is different: (0 2 2)
pot_gdp_full$s_adj_val <- final(pot_gdp_fit)


pot_gdp_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = s_adj_val, color = "Seasonally Adjusted")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "GDPPOT",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Seasonally Adjusted" = "blue", "Original" = "red"),
                     name = "Legend")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

pcepi_full %>%
  ggplot(aes(x = date)) + 
  geom_line(aes(y = s_adj_val, color = "Seasonally Adjusted")) +
  geom_line(aes(y = value, color = "Original")) +
  labs(title = "PCEPI",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Seasonally Adjusted" = "blue", "Original" = "red"),
                     name = "Legend")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

# # Split into train and test set 
# ffer_train <- ffer_full %>% filter(date >= as.Date('1980-01-01') & date <= as.Date('2023-10-01')) 

# ffer_test <- ffer_full %>% filter(date >= as.Date('2024-01-01')) 

#  # # Split into train and test set 

# pcepi_train <- pcepi_full %>% filter(date >= as.Date('1980-01-01') & date <= as.Date('2023-10-01')) 

# pcepi_test <- pcepi_full %>% filter(date >= as.Date('2024-01-01')) 

#  # # Split into train and test set 

# gdp_train <- gdp_full %>% filter(date >= as.Date('1980-01-01') & date <= as.Date('2023-10-01')) 

# gdp_test <- gdp_full %>% filter(date >= as.Date('2024-01-01')) 

#  # # Split into train and test set 
# pot_gdp_train <- pot_gdp_full %>% filter(date >= as.Date('1980-01-01') & date <= as.Date('2023-10-01')) 

# pot_gdp_test <- pot_gdp_full %>% filter(date >= as.Date('2024-01-01'))

Task 3

infl_data_tsbl <- ffer_full %>% 
  select(date, hampel_val) %>% 
  rename(FEDFUNDS_hampel = hampel_val) %>%
  left_join(pcepi_full, by = "date") %>%
  select(date, FEDFUNDS_hampel, s_adj_val, pct_chg_pce) %>%
  rename(PCEPI_s_adj = s_adj_val) %>%
  left_join(gdp_full, by = "date") %>%
  select(date, FEDFUNDS_hampel, PCEPI_s_adj, pct_chg_pce, hampel_val) %>%
  rename(GDPC1_hampel = hampel_val) %>%
  left_join(pot_gdp_full, by = "date") %>%
  select(date, FEDFUNDS_hampel, PCEPI_s_adj, pct_chg_pce, GDPC1_hampel, s_adj_val) %>%
  rename(GDPPOT_s_adj = s_adj_val) %>%
  mutate(
    output_gap = 100 * (GDPC1_hampel - GDPPOT_s_adj) / GDPPOT_s_adj,
    pct_chg_pce_s_adj = (PCEPI_s_adj / lag(PCEPI_s_adj) - 1),
    inflation_gap = pct_chg_pce_s_adj - 0.02,
    date = yearquarter(date)
  ) %>%
  as_tsibble(index = date)

infl_data_tsbl_train <- infl_data_tsbl %>% 
  filter(date <= yearquarter("2022 Q4"))
infl_data_tsbl_test <- infl_data_tsbl %>% 
  filter(date > yearquarter("2022 Q4"))
vars <- names(select(infl_data_tsbl_train, where(is.numeric)))

results <- data.frame(
  variable     = vars,
  stationary_3 = NA,
  stationary_2 = NA
)

for (i in seq_along(vars)) {
  x   <- infl_data_tsbl_train[[vars[i]]]
  res <- aTSA::adf.test(x, output = FALSE)

  # Evaluate if evidence of Trend and Drift
  p3 <- min(res$type3[, "p.value"], na.rm = TRUE)
  results$stationary_3[i] <- p3 < 0.05

  # Evaluate for Drift only
  p2 <- min(res$type2[, "p.value"], na.rm = TRUE)
  results$stationary_2[i] <- p2 < 0.05
}
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m3): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m3): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(m1): essentially perfect fit: summary may be unreliable
as_tibble(results)

Discussion:

Using the General to Specific approach, we first evaluate for drift and trend terms for each variable in the dataset. For this test, only the Seasonally Adjusted PCEPI and the GDPC with hampel filter variables fail to reject the null hypothesis of stationarity. From there, we evaluate all variable for drift only. This time, the seasonally adjusted GDPPOT variable also fails to reject the null, in addition to the aforementioned variables.

These test results suggest that the PCEPI and GDPC variables should be differenced before before use in modeling, to avoid spurious regression. The GDPPOT variable is trend-stationary because it does not contain a unit-root but it does contain a deterministic trend. In order to capture the explanatory power of the variable, GDPPOT_s_adj should not be differenced.

infl_data_tsbl_train <- infl_data_tsbl_train %>% 
  mutate(
    d_FEDFUNDS_hampel = difference(FEDFUNDS_hampel),
    d_PCEPI_s_adj = difference(PCEPI_s_adj),
    d_GDPC1_hampel = difference(GDPC1_hampel),
    d_GDPPOT_s_adj = difference(GDPPOT_s_adj)
  )
adf.test(infl_data_tsbl_train$d_PCEPI_s_adj)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -3.713  0.0100
## [2,]   1 -2.815  0.0100
## [3,]   2 -1.896  0.0581
## [4,]   3 -1.825  0.0686
## [5,]   4 -0.944  0.3408
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -6.32  0.0100
## [2,]   1 -5.11  0.0100
## [3,]   2 -3.78  0.0100
## [4,]   3 -3.89  0.0100
## [5,]   4 -2.63  0.0926
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -6.38  0.0100
## [2,]   1 -5.19  0.0100
## [3,]   2 -3.87  0.0174
## [4,]   3 -3.96  0.0130
## [5,]   4 -2.67  0.2968
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
adf.test(infl_data_tsbl_train$d_GDPC1_hampel)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -6.39    0.01
## [2,]   1 -3.90    0.01
## [3,]   2 -3.61    0.01
## [4,]   3 -3.27    0.01
## [5,]   4 -2.76    0.01
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -9.81    0.01
## [2,]   1 -6.57    0.01
## [3,]   2 -6.35    0.01
## [4,]   3 -6.09    0.01
## [5,]   4 -5.86    0.01
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -9.91    0.01
## [2,]   1 -6.64    0.01
## [3,]   2 -6.50    0.01
## [4,]   3 -6.31    0.01
## [5,]   4 -6.04    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

Discussion

After differencing the two variables identified by the ADF test above, they no longer show trend or drift. Notably, there are some lags for both variables that result in a failure to reject the null hypothesis. Therefore care must be taken when determining which lags to use when modeling with these variables to ensure they remain stationary.

za_results <- map_dfr(vars, function(v) {
  x <- infl_data_tsbl_train[[v]]
  test <- ur.za(x, model = "both")
  result <- summary(test)
  tibble(
    variable = v,
    model = test@model[1],
    sig_thresh = result@cval[2],
    test_res = result@teststat,
    stry_w_brk = result@teststat < result@cval[2]
  )
})

za_results

Discussion:

In comparison to the ADF test, the Zivot Andrews test identifies more variables that are not stationary when accounting for structural breaks in the data. The FEDFUNDS_hampel, PCEPI_s_adj, GDPC1_hampel, GDPPOT_s_adj and output_gap variables fail to reject the null hypothesis of stationarity with a structural break.

This means that the additional variables identified over the ADF only appear non-stationary because of a structural break. Once that break is included, the variables are found to be non-stationary. They should be differenced to reduce issues with model inference due to non-stationary residuals or spurious regression.

Task 4

taylor_mod <- infl_data_tsbl_train %>%
  model(tlsm = TSLM(FEDFUNDS_hampel ~ inflation_gap + output_gap))
report(taylor_mod)
## Series: FEDFUNDS_hampel 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9.33831 -2.50848 -0.01132  2.21617  9.51659 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     9.8247     0.6965  14.106  < 2e-16 ***
## inflation_gap 406.7634    48.3740   8.409 1.71e-14 ***
## output_gap      0.1176     0.1357   0.866    0.388    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 168 degrees of freedom
## Multiple R-squared: 0.2999,  Adjusted R-squared: 0.2915
## F-statistic: 35.98 on 2 and 168 DF, p-value: 9.8768e-14
taylor_mod %>%
  gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

Discussion:

Previous analysis on the variables indicated that they should be differenced before use in regression. However doing so results in a different interpretation of the Taylor Rule. The non-stationary variables were used so that discussion could revolve around the models adherence to economic theory.

With this model, the inflation gap estimate is significant with a value of 406.8. This variable is reported in hundredths, so the correct interpretation about a 4% increase for every 1% inflation point above the inflation target. This does match economic theory, in that the Fed Fund rate increases as inflation increases in an effort to “cool off” the economy. This estimate is slightly high in comparison to real targets of 2%. In general, this means that in this sample, the funds rate reacts much more strongly to inflation than what would be considered the “rule of thumb”. This may be due to the several economic shocks that occurred through the training time periods.

The Autocorrelation plots for this model shows that the residuals are significant across all lags. This shows that there is additional information that is not captured with the model, as the residuals are not white noise.

accuracy(taylor_mod)$RMSE
## [1] 3.29877
taylor_fc <- forecast(taylor_mod, new_data = infl_data_tsbl_test)
accuracy(taylor_fc, infl_data_tsbl_test)$RMSE
## [1] 0.9260019
autoplot(taylor_fc, infl_data_tsbl_train) +
  labs(
    title = "Taylor Rule: Fitted and Forecasted Fed Funds Rate",
    x = "Date",
    y = "Federal Funds Rate (%)"
  ) +
  theme_minimal()

Discussion:

The Root Mean Squared Error value for the training data is 3.29. For the test data, the value is 0.92. So for the training data, the model is generally within 3.3 percentage points of the actual data, a pretty large spread overall. However this tightens to within 1 percentage point for the test data.

This difference may be attributed to the relative calm in the market during the test period compared to the training period. For example, the training period contains large abnormalities, like the Dot-Com bubble, housing crisis, and COVID, all within nearly 20 years of the end of the training data. The test data has no such unrest, and is therefore able to more “accurate” in describing the fed rate response to inflation. For these reasons, it is likely improper to suggest that this is an optimized model for future forecasting.

Task 5

5.1: Cointegration Test (5 Points)

## Tom Commented this code after creating the model in task 4
# taylor_mod <- lm(FEDFUNDS_hampel ~ inflation_gap + output_gap, 
#                  data = infl_data_tsbl_train)
# summary(taylor_mod)
# Anthony comment - theres an error here. `taylor_mod` above is a MABLE not a numeric vector. Need the residual as a numeric vector...

# taylor_resid <- resid(taylor_mod)
# adf.test(taylor_resid)
# corrected code below -

taylor_resid_tsbl <- residuals(taylor_mod)

taylor_resid <- taylor_resid_tsbl %>%
  filter(.model == "tlsm") %>%
  pull(.resid)

adf.test(taylor_resid)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.67  0.0100
## [2,]   1 -2.94  0.0100
## [3,]   2 -2.35  0.0204
## [4,]   3 -2.51  0.0133
## [5,]   4 -2.43  0.0171
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.66  0.0100
## [2,]   1 -2.93  0.0463
## [3,]   2 -2.33  0.1981
## [4,]   3 -2.50  0.1348
## [5,]   4 -2.39  0.1765
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -6.39    0.01
## [2,]   1 -5.85    0.01
## [3,]   2 -4.58    0.01
## [4,]   3 -4.93    0.01
## [5,]   4 -4.19    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

Since the p value is 0.01024 and less than 0.05, we reject the null hypothesis that the series has a unit root. This means that the residuals are stationary and there is a long-run equilibrium relationship.

5.2: Estimating the ECM via OLS (5 Points)

# full residuals to add back to tsibble
long_run_model <- lm(FEDFUNDS_hampel ~ inflation_gap + output_gap, 
                     data = infl_data_tsbl)
long_run_resid <- c(NA, resid(long_run_model))

# add ECM variables
infl_data_tsbl_ecm <-  infl_data_tsbl %>%
  mutate(
    d_it = FEDFUNDS_hampel - lag(FEDFUNDS_hampel),
    d_infl_gap = inflation_gap - lag(inflation_gap),
    d_output_gap = output_gap - lag(output_gap),
    lag_resid = lag(long_run_resid)
  )

infl_data_tsbl_ecm_train <- infl_data_tsbl_ecm %>% 
  filter(date <= yearquarter("2022 Q4"))
infl_data_tsbl_ecm_test <- infl_data_tsbl_ecm %>% 
  filter(date > yearquarter("2022 Q4"))
ecm_model <- lm(d_it ~ d_infl_gap + d_output_gap + lag_resid, 
                data = infl_data_tsbl_ecm)
summary(ecm_model)
## 
## Call:
## lm(formula = d_it ~ d_infl_gap + d_output_gap + lag_resid, data = infl_data_tsbl_ecm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6049 -0.3062 -0.0009  0.2609  5.4345 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.05479    0.05493  -0.997 0.319938    
## d_infl_gap   12.63397   14.01487   0.901 0.368571    
## d_output_gap  0.42169    0.08784   4.801 3.36e-06 ***
## lag_resid    -0.06119    0.01750  -3.497 0.000596 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7361 on 176 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1956, Adjusted R-squared:  0.1819 
## F-statistic: 14.27 on 3 and 176 DF,  p-value: 2.3e-08

5.3: Compare ECM Forecasts (5 Points)

ecm_model_train <- lm(d_it ~ d_infl_gap + d_output_gap + lag_resid, 
                data = infl_data_tsbl_ecm_train)
summary(ecm_model_train)
## 
## Call:
## lm(formula = d_it ~ d_infl_gap + d_output_gap + lag_resid, data = infl_data_tsbl_ecm_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6112 -0.3227  0.0011  0.2753  5.4414 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.06031    0.05787  -1.042  0.29884    
## d_infl_gap   12.70562   14.53533   0.874  0.38332    
## d_output_gap  0.42150    0.09111   4.627 7.45e-06 ***
## lag_resid    -0.05995    0.01798  -3.334  0.00105 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7539 on 166 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1932, Adjusted R-squared:  0.1786 
## F-statistic: 13.25 on 3 and 166 DF,  p-value: 8.524e-08
# forecast d_it
ecm_fc <- forecast(ecm_model_train, newdata = as.data.frame(infl_data_tsbl_ecm_test))
d_hat <- ecm_fc$mean

# Convert to level forecasts
i_hat <- numeric(length(d_hat))
# Start with last value of train data and recursively add
i_hat[1] <- tail(infl_data_tsbl_ecm_train$FEDFUNDS_hampel, 1) + d_hat[1] 
for (t in 2:length(d_hat)) {
  i_hat[t] <- i_hat[t - 1] + d_hat[t]
}

# We dont show the in-sample performance
ecm_rmse <- sqrt(mean((infl_data_tsbl_ecm_test$FEDFUNDS_hampel - i_hat)^2, na.rm = TRUE))

ecm_rmse
## [1] 1.272986
# Anthony comment - this is not worrking because taylor_mod is a table not numeric 
# ols_fc <- forecast(taylor_mod, newdata = infl_data_tsbl_test)
# ols_pred <- ols_fc$mean
# ols_rmse <- sqrt(mean((infl_data_tsbl_test$FEDFUNDS_hampel - ols_pred)^2, na.rm = TRUE))
# ols_rmse

ols_lm <- lm(FEDFUNDS_hampel ~ inflation_gap + output_gap, data = infl_data_tsbl_train)

ols_fc <- forecast(ols_lm, newdata = as.data.frame(infl_data_tsbl_test))

ols_pred <- as.numeric(ols_fc$mean)

ols_rmse <- sqrt(mean(
  (infl_data_tsbl_test$FEDFUNDS_hampel - ols_pred)^2,
  na.rm = TRUE
))

ols_rmse
## [1] 0.9260019

The ECM model performs slightly worse with a higher RMSE.

Task 6 ARIMA Modeling of Taylor Rule Errors

# Rebuilding train and test for this task for posterity sake 
train_df <- infl_data_tsbl_train %>%
  select(date, FEDFUNDS_hampel, inflation_gap, output_gap) %>%
  filter(!is.na(FEDFUNDS_hampel),
         !is.na(inflation_gap),
         !is.na(output_gap)) %>%
  arrange(date)

test_df <- infl_data_tsbl_test %>%
  select(date, FEDFUNDS_hampel, inflation_gap, output_gap) %>%
  filter(!is.na(FEDFUNDS_hampel),
         !is.na(inflation_gap),
         !is.na(output_gap)) %>%
  arrange(date)

# Rebuilding the Taylor Rule OLS Model (same as Daniel's above)
ols_model <- lm(FEDFUNDS_hampel ~ inflation_gap + output_gap, data = train_df)
ols_resid_train <- resid(ols_model)

start_year    <- lubridate::year(train_df$date[1])
start_quarter <- lubridate::quarter(train_df$date[1])
ols_resid_train_ts <- ts(ols_resid_train,
                         start = c(start_year, start_quarter),
                         frequency = 4)

# Running some additional stationarity tests on the residuals for fun. 
kpss.test(ols_resid_train_ts)  
## KPSS Unit Root Test 
## alternative: nonstationary 
##  
## Type 1: no drift no trend 
##  lag  stat p.value
##    3 0.928     0.1
## ----- 
##  Type 2: with drift no trend 
##  lag stat p.value
##    3 1.07    0.01
## ----- 
##  Type 1: with drift and trend 
##  lag   stat p.value
##    3 0.0655     0.1
## ----------- 
## Note: p.value = 0.01 means p.value <= 0.01 
##     : p.value = 0.10 means p.value >= 0.10
za_test <- ur.za(ols_resid_train_ts, model = "both")
summary(za_test)
## 
## ################################ 
## # Zivot-Andrews Unit Root Test # 
## ################################ 
## 
## 
## Call:
## lm(formula = testmat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4070 -1.0226 -0.0317  0.8637  8.6012 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.68440    1.46369   0.468  0.64070    
## y.l1         0.55586    0.06266   8.872 1.14e-15 ***
## trend        0.48149    0.25658   1.877  0.06235 .  
## du          -3.49390    1.11082  -3.145  0.00197 ** 
## dt          -0.50240    0.25756  -1.951  0.05280 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.589 on 165 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7759, Adjusted R-squared:  0.7704 
## F-statistic: 142.8 on 4 and 165 DF,  p-value: < 2.2e-16
## 
## 
## Teststatistic: -7.0885 
## Critical values: 0.01= -5.57 0.05= -5.08 0.1= -4.82 
## 
## Potential break point at position: 9
pp_test <- ur.pp(ols_resid_train_ts,
                 type = "Z-tau",
                 model = "trend",
                 lags  = "short")
summary(pp_test)
## 
## ################################## 
## # Phillips-Perron Unit Root Test # 
## ################################## 
## 
## Test regression with intercept and trend 
## 
## 
## Call:
## lm(formula = y ~ y.l1 + trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7460 -1.0364 -0.1035  0.9177  8.7368 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.005705   0.125239  -0.046    0.964    
## y.l1         0.610643   0.060933  10.022  < 2e-16 ***
## trend       -0.020722   0.004102  -5.052 1.14e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.633 on 167 degrees of freedom
## Multiple R-squared:  0.7606, Adjusted R-squared:  0.7577 
## F-statistic: 265.3 on 2 and 167 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic, type: Z-tau  is: -6.4136 
## 
##            aux. Z statistics
## Z-tau-mu             -0.0450
## Z-tau-beta           -5.0699
## 
## Critical values for Z statistics: 
##                      1pct      5pct     10pct
## critical values -4.014577 -3.436976 -3.142386

To build on work completed above, we provide additional stationarity tests on the residuals. Here, applied three complementary unit-root diagnostics to the Taylor Rule residuals, denoted $ {_t}$. The KPSS test (null \(H_0\): stationarity) indicates that the series is stationary in levels once a deterministic trend is allowed. Specifically, the level-stationarity version yields \(p \ge 0.10\), and the trend-stationarity version also yields \(p \ge 0.10\), while the “with drift, no trend” variant rejects with \(p \le 0.01\). Taken together, these results suggest that \(\hat{\varepsilon}_t\) is trend–stationary rather than containing a stochastic trend: deviations are mean-reverting around a deterministic component. Formally, we fail to reject stationarity for $ _t = + t + u_t$ with \(u_t\) covariance-stationary.

Allowing for an endogenous structural change, the Zivot–Andrews test rejects the unit-root null once a single break in intercept and trend is permitted. The test statistic is \(-7.09\), which is more negative than the 1% critical value (\(-5.57\)), implying stationarity conditional on one regime shift. The estimated break occurs early in the sample (position \(\approx 9\)), consistent with a Volcker-era policy shift [1]. Economically, this means the Taylor Rule relationship is stable within regimes but parameters likely changed once, so that \(\hat{\varepsilon}_t\) behaves as a stationary process after accounting for that break.

The Phillips–Perron test, which shares the ADF null \(H_0\) unit root but is robust to serial correlation and heteroskedasticity, also rejects nonstationarity decisively (Z–tau \(=-6.41\), beyond the 1% critical value). This corroborates that the residual process does not harbor a stochastic trend once inflation and output gaps are included on the right-hand side.

Overall, the three diagnostics are mutually consistent: \(\hat{\varepsilon}_t\) is best characterized as stationary, possibly around a deterministic trend and with evidence of a single structural break. This justifies modeling the unexplained component with a low-order ARIMA on the residuals. However, we take the route often taken in practice, where differencing once (ARIMA with \(d=1\)) can act as a convenient device to remove the deterministic drift and any slow-moving break-induced level shift, yielding a stationary innovation process for forecasting. Consequently, the combined forecast we report later follows \[ \widehat{i}_t^{\,\text{combined}} \;=\; \widehat{i}_t^{\,\text{Taylor}} \;+\; \widehat{\varepsilon}_t^{\,\text{ARIMA}}, \]

where \(\widehat{i}_t^{\,\text{Taylor}}\) is the OLS Taylor Rule fit and $ _t^{,}$ is the ARIMA forecast of the residual process that is stationary within regimes.

#some visualizations
autoplot(ols_resid_train_ts) +
  labs(title = "Taylor Rule Residuals (Train)",
       y = "Residuals",
       x = "Year") +
  theme_minimal()

ggplot(data.frame(resid = ols_resid_train), aes(x = resid)) +
  geom_histogram(aes(y = ..density..), bins = 20,
                 fill = "lightblue", color = "black") +
  geom_density(color = "red") +
  labs(title = "Distribution of Taylor Rule Residuals (Train)",
       x = "Residual", y = "Density") +
  theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

par(mfrow = c(1, 2))
acf(ols_resid_train, main = "ACF of OLS Residuals (Train)", lag.max = 20)
pacf(ols_resid_train, main = "PACF of OLS Residuals (Train)", lag.max = 20)

par(mfrow = c(1, 1))

# fitting some ARIMA models to compare to auto arima
y <- ols_resid_train  # residuals from Taylor OLS

# Define p, q grid
p_max <- 3
q_max <- 3

candidates <- expand.grid(p = 0:p_max, q = 0:q_max) %>%
  filter(!(p == 0 & q == 0))

diff_resid <- diff(ols_resid_train)
diff_resid_ts <- ts(diff_resid, start = c(1980, 1), frequency = 4)

autoplot(diff_resid_ts) +
  labs(title = "Differenced OLS Residuals", x = "Year", y = "Differenced Residuals") +
  theme_minimal()

adf.test(diff_resid)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -16.27    0.01
## [2,]   1 -12.57    0.01
## [3,]   2  -8.54    0.01
## [4,]   3  -8.92    0.01
## [5,]   4  -7.93    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -16.22    0.01
## [2,]   1 -12.55    0.01
## [3,]   2  -8.53    0.01
## [4,]   3  -8.96    0.01
## [5,]   4  -8.01    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -16.18    0.01
## [2,]   1 -12.50    0.01
## [3,]   2  -8.48    0.01
## [4,]   3  -8.86    0.01
## [5,]   4  -7.93    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
kpss.test(diff_resid)
## KPSS Unit Root Test 
## alternative: nonstationary 
##  
## Type 1: no drift no trend 
##  lag   stat p.value
##    3 0.0388     0.1
## ----- 
##  Type 2: with drift no trend 
##  lag   stat p.value
##    3 0.0371     0.1
## ----- 
##  Type 1: with drift and trend 
##  lag   stat p.value
##    3 0.0308     0.1
## ----------- 
## Note: p.value = 0.01 means p.value <= 0.01 
##     : p.value = 0.10 means p.value >= 0.10
fit_arma23 <- Arima(ols_resid_train, order = c(2,0,3))
fit_arima213 <- Arima(ols_resid_train, order = c(2,1,3))
AIC(fit_arma23, fit_arima213)
## Warning in AIC.default(fit_arma23, fit_arima213): models are not all fitted to
## the same number of observations
forecast::ndiffs(ols_resid_train)
## [1] 1
auto_arima_resid <- auto.arima(ols_resid_train, seasonal = FALSE)
summary(auto_arima_resid)
## Series: ols_resid_train 
## ARIMA(2,1,3) 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     ma3
##       0.3096  0.3685  -0.6173  -0.5149  0.1997
## s.e.  0.2901  0.1864   0.2884   0.2340  0.1058
## 
## sigma^2 = 2.872:  log likelihood = -328.69
## AIC=669.37   AICc=669.89   BIC=688.19
## 
## Training set error measures:
##                      ME    RMSE      MAE      MPE    MAPE      MASE        ACF1
## Training set -0.1803083 1.66482 1.203033 11.30536 130.181 0.9503362 -0.02449731
forecast::ndiffs(ols_resid_train)
## [1] 1

We begin by examining the residual series \(\hat{\varepsilon}_t\) from the OLS Taylor Rule model.
The time‐series plot of residuals shows long memory and a clear downward drift until the early 2000s, followed by a period of volatility spikes (notably during the 2008 financial crisis and the COVID-19 period). This persistence suggests that the residuals are not white noise and may exhibit autocorrelation or a weak unit root, despite the test statistics on the non-differenced residuals above.

The autocorrelation and partial autocorrelation plots (ACF and PACF) reinforce this observation.
The ACF decays slowly, indicating strong persistence in shocks, while the PACF shows a sharp cutoff after lag 1. This pattern is consistent with an autoregressive process of order one, AR(1), or potentially an ARIMA(1,1,0) structure with a single difference needed to achieve stationarity.
Such behavior implies that the shocks to the Taylor Rule equation tend to persist over multiple quarters, rather than dissipating immediately.

After differencing the residuals, the time series of \(\Delta \hat{\varepsilon}_t\) becomes mean-reverting with stable variance and no clear trend. Visually, this differenced series appears stationary, suggesting that one level of differencing (\(d = 1\)) is sufficient to remove the long-run component. Formally, both the Augmented Dickey–Fuller (ADF) and KPSS tests confirm this:
the ADF strongly rejects the null of a unit root (all \(p \le 0.01\)), while the KPSS fails to reject the null of stationarity (\(p \ge 0.10\)). Together, these imply that the differenced residuals are stationary.

To formally capture this structure, we fit several ARIMA models. The AIC and BIC comparison indicates that higher-order mixed models outperform pure AR or MA specifications.
Specifically, ARIMA(2,1,3), chosen by auto-ARIMA, achieves the lowest AIC (\(AIC = 669.37\)), outperforming all ARIMA models tested.

The estimated parameters show significant AR and MA terms, confirming that both short-run momentum and shock effects are present.

In summary, the residual diagnostics show that:

  1. The raw residuals \(\hat{\varepsilon}_t\) are non-stationary, consistent with persistent policy shocks.
  2. First differencing yields stationary residuals \(\Delta \hat{\varepsilon}_t\).
  3. The optimal model based on information criteria is ARIMA(2,1,3), which balances autoregressive persistence with moving-average smoothing.

This result justifies the use of the combined model:

\[ \widehat{i}_t^{\,\text{combined}} = \widehat{i}_t^{\,\text{Taylor}} + \widehat{\varepsilon}_t^{\,\text{ARIMA}}, \]

where the ARIMA component refines the Taylor Rule forecast by modeling serial correlation in the policy deviations. The combined model therefore captures both the systematic and dynamic elements of the Federal Reserve’s interest rate decisions.

ols_pred_train <- fitted(ols_model)
ols_pred_test  <- predict(ols_model, newdata = test_df)

# ARIMA fitted residuals (train) and residual forecasts (test)
resid_fit_train <- fitted(auto_arima_resid)

h_test <- nrow(test_df)
resid_fc_test <- forecast(auto_arima_resid, h = h_test)$mean

# combined predictions
combined_pred_train <- ols_pred_train + resid_fit_train
combined_pred_test <- ols_pred_test + as.numeric(resid_fc_test)

# RMSEs Taylor-only
ols_rmse_train <- sqrt(mean((train_df$FEDFUNDS_hampel - ols_pred_train)^2))
ols_rmse_test <- sqrt(mean((test_df$FEDFUNDS_hampel - ols_pred_test)^2))

# RMSEs: Taylor combined w/ ARIMA
combined_rmse_train <- sqrt(mean((train_df$FEDFUNDS_hampel - combined_pred_train)^2))
combined_rmse_test  <- sqrt(mean((test_df$FEDFUNDS_hampel  - combined_pred_test)^2))

# ECM (from above)
ecm_pred_train <- fitted(ecm_model_train)
ecm_rmse_train <- sqrt(mean((infl_data_tsbl_ecm_train$d_it - ecm_pred_train)^2, na.rm = TRUE))
## Warning in infl_data_tsbl_ecm_train$d_it - ecm_pred_train: longer object length
## is not a multiple of shorter object length
# table making 
comparison <- tibble(
  Model = c("Taylor Rule (OLS)",
            "ECM",
            "Taylor - ARIMA Combined"),
  Train_RMSE = c(ols_rmse_train,
                 ecm_rmse_train,
                 combined_rmse_train),
  Test_RMSE  = c(ols_rmse_test,
                 ecm_rmse,        
                 combined_rmse_test)
)

comparison

The table below compares the RMSE for the three models across the training and test sets:

Model Train RMSE Test RMSE
Taylor Rule (OLS) 3.30 0.93
ECM 0.96 1.27
Taylor–ARIMA Combined 1.66 4.00

The Taylor Rule (OLS) model provides a relatively loose in-sample fit (RMSE ≈ 3.3) but performs best on the test set (RMSE ≈ 0.93).
This suggests that while the OLS model does not fully capture short-term fluctuations during the estimation period, it generalizes reasonably well for the more stable post-2022 period, when macroeconomic volatility was subdued.

The Error Correction Model (ECM) achieves the lowest training RMSE (≈ 0.96), implying that differencing and inclusion of lagged disequilibrium terms successfully capture most short-run adjustments within the training sample.
However, its slightly higher out-of-sample RMSE (≈ 1.27) indicates mild overfitting—expected since the ECM optimizes within-sample dynamic correction rather than long-horizon prediction.

By contrast, the Taylor–ARIMA Combined model shows improved in-sample residual structure (RMSE ≈ 1.66) but a larger out-of-sample RMSE (≈ 4.00).
This outcome arises because the ARIMA component (\(\text{ARIMA}(2,1,3)\)) is tuned to capture serial correlation in historical policy deviations (\(\hat{\varepsilon}_t\)), which were highly volatile during the 1980–2020 training period.
When applied to the test period—marked by comparatively stable rates—the ARIMA extrapolation amplifies noise, yielding over-dispersion in forecasts.
Thus, while ARIMA enhances the in-sample dynamics by filtering autocorrelation, it reduces out-of-sample robustness under regime stability.

In economic terms, this suggests that the Taylor Rule itself remains structurally sound for recent policy behavior, and that much of the autocorrelation observed historically reflects temporary shocks or regime-specific persistence.
The combined model’s poorer forecast performance emphasizes the importance of model parsimony when the policy environment changes: the additional ARIMA layer, while statistically valid, can misinterpret macroeconomic calm as signal.

In summary:

The final combined forecast equation is:

\[ \widehat{i}_t^{\,\text{combined}} = \widehat{i}_t^{\,\text{Taylor}} + \widehat{\varepsilon}_t^{\,\text{ARIMA}}, \]

where \(\widehat{i}_t^{\,\text{Taylor}}\) represents the structural component implied by inflation and output gaps, and
\(\widehat{\varepsilon}_t^{\,\text{ARIMA}}\) captures the dynamic residual correction term.
Overall, the findings highlight that post-2020 policy behavior is best captured by the canonical Taylor Rule rather than by more complex dynamic extensions.

Appendix